package com.trytech.mongoocrawler.client.parser.jd;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.trytech.mongoocrawler.client.common.entity.JDItem;
import com.trytech.mongoocrawler.client.common.queue.UrlFetcherEventProducer;
import com.trytech.mongoocrawler.client.common.util.HttpUtils;
import com.trytech.mongoocrawler.client.parser.HtmlParser;
import com.trytech.mongoocrawler.client.transport.http.WebResult;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 京东图书详情解析器
 * @author Collin Chiang
 * @date 2017-04-15
 */
public class JDBookDetailParser extends HtmlParser<JDItem> {
    @Override
    public JDItem parse(WebResult webResult, UrlFetcherEventProducer urlProducer) {
        try {
            String html = ((WebResult<String>)webResult).getData();
            String originalUrl = ((WebResult<String>)webResult).getUrl();
            JDItem item = new JDItem();
            item.setUrl(webResult.getUrl());
            Element body = getBody(html);
            Element itemInfoEle = body.getElementById("itemInfo");
            Element nameFragmentEle = itemInfoEle.getElementById("name");
            //获取书名
            String  name = nameFragmentEle.getElementsByTag("h1").first().text();
            item.setName(name);
            //获取作者
            String author = nameFragmentEle.getElementById("p-author").text();
            item.setAuthor(author);
            //获取售价
            String priceJson = HttpUtils.get("https://p.3.cn/prices/get?skuid=J_"+parseSkuFromUrl(originalUrl));
            JSONArray priceArr = JSONArray.parseArray(priceJson);
            JSONObject priceItem = priceArr.getJSONObject(0);
            String price = priceItem.getString("p");
            item.setPrice(Float.parseFloat(price));

            Element infoEle = body.getElementById("parameter2");
            Elements liEle = infoEle.getElementsByTag("li");
            Element agentEle = liEle.get(0);
            //获取出版社
            String agent = agentEle.attr("title");
            item.setAgent(agent);
            //获取ISBN
            String isbn = liEle.get(1).attr("title");
            item.setIsbn(isbn);
            //获取语言
            try {
                String language = liEle.get(12).attr("title");
                item.setLanguage(language);
            }catch (IndexOutOfBoundsException e){

            }
            //获取商品编号
            String no = liEle.get(3).attr("title");
            item.setNo(no);

            //获取好评率
            try {
                Element commentEle = body.getElementById("i-comment");
                Element strongEle = commentEle.getElementsByTag("strong").first();
                String goodCommentRate = strongEle.text();
                item.setGoodCommentRate(Short.parseShort(goodCommentRate));
            }catch (Exception e){

            }

            //获取评价数
            try {
                Element commTabEle = body.getElementById("detail-tab-comm");
                Element commEle = commTabEle.getElementsByTag("em").first();
                String commentCount = commEle.text();
                commentCount = commentCount.replaceAll("[\\(\\)]", "");
                item.setCommentCount(commentCount);
            }catch (Exception e){

            }
            return item;
        }catch (Exception e){
            e.printStackTrace();
            return null;
        }
    }

    private String parseSkuFromUrl(String url){
        String[] urlArr = url.split("/");
        String skuId = urlArr[urlArr.length-1].replaceAll("\\D", "");
        return skuId;
    }
}
